In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [7]:
file = pd.read_csv('turnover.csv',encoding='ISO-8859-1')
df = pd.DataFrame(file)
df.head()
Out[7]:
stag event gender age industry profession traffic coach head_gender greywage way extraversion independ selfcontrol anxiety novator
0 7.030801 1 m 35.0 Banks HR rabrecNErab no f white bus 6.2 4.1 5.7 7.1 8.3
1 22.965092 1 m 33.0 Banks HR empjs no m white bus 6.2 4.1 5.7 7.1 8.3
2 15.934292 1 f 35.0 PowerGeneration HR rabrecNErab no m white bus 6.2 6.2 2.6 4.8 8.3
3 15.934292 1 f 35.0 PowerGeneration HR rabrecNErab no m white bus 5.4 7.6 4.9 2.5 6.7
4 8.410678 1 m 32.0 Retail Commercial youjs yes f white bus 3.0 4.1 8.0 7.1 3.7

Diagram of the category variables¶

In [8]:
rows    = 4
columns = 2
c       = 1 # Inicializar plot counter
df_objects = df.select_dtypes(include=[object])
# Histograma categorical variables
fig = plt.figure(figsize=(10,20))
for i in df_objects.columns.values:

    ax = plt.subplot(rows,columns,c)
    pd.crosstab(df[i],df.event).plot(kind='bar',ax=ax)
    plt.title('Freqence   {}'.format(i))
    plt.ylabel('Frecuence')
    plt.xlabel('{}'.format(i))
    plt.tight_layout(pad=4.0)
    c = c + 1
No description has been provided for this image
In [8]:
 

Diagram of the contunious variables¶

In [8]:
 
In [9]:
import plotly.express as px

# Create a dictionary mapping event values to colors
color_map = {0: 'blue', 1: 'orange'}

df_objects2 = df.select_dtypes(include=[np.float64] or [np.int64])

for i in df_objects2.columns.values:
    fig = px.histogram(df, x=i, color='event', marginal='box', barmode='group', color_discrete_map=color_map)
    fig.show()
In [15]:
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import numpy as np

turnover2 = df.copy()
encoder = LabelEncoder()

turnover2['gender'] = encoder.fit_transform(turnover2['gender'])
turnover2['industry'] = encoder.fit_transform(turnover2['industry'])
turnover2['profession'] = encoder.fit_transform(turnover2['profession'])
turnover2['traffic'] = encoder.fit_transform(turnover2['traffic'])
turnover2['coach'] = encoder.fit_transform(turnover2['coach'])
turnover2['head_gender'] = encoder.fit_transform(turnover2['head_gender'])
turnover2['greywage'] = encoder.fit_transform(turnover2['greywage'])
turnover2['way'] = encoder.fit_transform(turnover2['way'])

df = df.replace([np.inf, -np.inf], np.nan)
sns.pairplot(data=df[['stag', 'age', 'extraversion', 'independ', 'selfcontrol', 'anxiety', 'novator', 'event']], hue='event');
C:\Users\q\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\q\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\q\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\q\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\q\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\q\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

C:\Users\q\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning:

use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.

No description has been provided for this image
In [11]:
import plotly.express as px
df_corr = turnover2.corr().round(2)
fig = px.imshow(df_corr, text_auto = True, labels=dict(color="Correlation"), width=800, height=800)
fig.show()

PCA¶

In [12]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler

turnover3 = turnover2.drop(columns=['event'])
X= turnover3
for i in range (0,16):
    pca = PCA(n_components=i)
    X_pca = pca.fit_transform(X)
    print(i,'_components', pca.explained_variance_ratio_)
0 _components []
1 _components [0.92512714]
2 _components [0.92512714 0.03786064]
3 _components [0.92512714 0.03786064 0.0139859 ]
4 _components [0.92512714 0.03786064 0.0139859  0.00596456]
5 _components [0.92512714 0.03786064 0.0139859  0.00596456 0.00483203]
6 _components [0.92512714 0.03786064 0.0139859  0.00596456 0.00483203 0.00373711]
7 _components [0.92512714 0.03786064 0.0139859  0.00596456 0.00483203 0.00373711
 0.00333072]
8 _components [0.92512714 0.03786064 0.0139859  0.00596456 0.00483203 0.00373711
 0.00333072 0.00238391]
9 _components [0.92512714 0.03786064 0.0139859  0.00596456 0.00483203 0.00373711
 0.00333072 0.00238391 0.0012091 ]
10 _components [9.25127140e-01 3.78606386e-02 1.39859027e-02 5.96455689e-03
 4.83202750e-03 3.73711090e-03 3.33071880e-03 2.38391146e-03
 1.20909591e-03 5.36221602e-04]
11 _components [9.25127140e-01 3.78606386e-02 1.39859027e-02 5.96455689e-03
 4.83202750e-03 3.73711090e-03 3.33071880e-03 2.38391146e-03
 1.20909591e-03 5.36221602e-04 3.52046866e-04]
12 _components [9.25127140e-01 3.78606386e-02 1.39859027e-02 5.96455689e-03
 4.83202750e-03 3.73711090e-03 3.33071880e-03 2.38391146e-03
 1.20909591e-03 5.36221602e-04 3.52046866e-04 2.89974727e-04]
13 _components [9.25127140e-01 3.78606386e-02 1.39859027e-02 5.96455689e-03
 4.83202750e-03 3.73711090e-03 3.33071880e-03 2.38391146e-03
 1.20909591e-03 5.36221602e-04 3.52046866e-04 2.89974727e-04
 2.02351217e-04]
14 _components [9.25127140e-01 3.78606386e-02 1.39859027e-02 5.96455689e-03
 4.83202750e-03 3.73711090e-03 3.33071880e-03 2.38391146e-03
 1.20909591e-03 5.36221602e-04 3.52046866e-04 2.89974727e-04
 2.02351217e-04 1.12501183e-04]
15 _components [9.25127140e-01 3.78606386e-02 1.39859027e-02 5.96455689e-03
 4.83202750e-03 3.73711090e-03 3.33071880e-03 2.38391146e-03
 1.20909591e-03 5.36221602e-04 3.52046866e-04 2.89974727e-04
 2.02351217e-04 1.12501183e-04 7.58014646e-05]